InĀ [26]:
# Load the libraries we will need
suppressPackageStartupMessages({
  library(tidyverse) 
  library(dplyr)
  library(plotly)
  library(ggplot2)
  library(viridis)
  library(naniar)
  library(hrbrthemes)
})
# remove warning messages for exporting
options(warn = -1)

Foundation Foods includes values for nutrients and other food components for a diverse range of basic foods (unprocessed or lightly processed foods) and provides extensive underlying metadata, including the number of samples, sampling location, date of collection, analytical approaches used, and if appropriate, agricultural information such as genotype and production practices.

InĀ [27]:
# Import our dataset and clean the names of the columns
food_composition_raw <- read.csv("/kaggle/input/composition-of-foods-integrated-dataset-cofid/McCance_Widdowsons_Composition_of_Foods_Integrated_Dataset_2021.csv")%>%
    janitor::clean_names()
InĀ [28]:
# Let's have a look at our data
head(food_composition_raw)
glimpse(food_composition_raw)
A data.frame: 6 Ɨ 56
food_codefood_namedescriptiongrouppreviousmain_data_referencesfootnotewater_gtotal_nitrogen_gprotein_g⋯cholesterol_mgxx_1x_2x_3x_4x_5x_6x_7x_8
<chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>⋯<chr><lgl><lgl><lgl><lgl><lgl><lgl><lgl><lgl><lgl>
1 WATERTOTNIT PROT ⋯CHOL NANANANANANANANANA
2 WaterTotal nitrogenProtein⋯CholesterolNANANANANANANANANA
313-145Ackee, canned, drained 8 cans DG554MW4, 1978; and Vegetables, Herbs and Spices Supplement, 1991 76.7 0.46 2.9 ⋯0.0 NANANANANANANANANA
413-146Agar, dried Literature sources DG Wu Leung et al. (1972) Food composition table for use in East Asia, Food and Agriculature Organization and US Department of Health9.7 0.26 1.3 ⋯0.0 NANANANANANANANANA
513-147Agar, dried, soaked and drainedLiterature sources DG Wu Leung et al. (1972) Food composition table for use in East Asia, Food and Agriculature Organization and US Department of Health84.2 0.03 0.2 ⋯0.0 NANANANANANANANANA
613-148Alfalfa sprouts, raw Analytical and literature sourcesDG Vegetables, Herbs and Spices Supplement, 1991 93.4 0.64 4.0 ⋯0.0 NANANANANANANANANA
Rows: 2,889
Columns: 56
$ food_code                     <chr> "", "", "13-145", "13-146", "13-147", "1…
$ food_name                     <chr> "", "", "Ackee, canned, drained", "Agar,…
$ description                   <chr> "", "", "8 cans", "Literature sources", …
$ group                         <chr> "", "", "DG", "DG", "DG", "DG", "H", "GA…
$ previous                      <chr> "", "", "554", "", "", "", "", "14-801 1…
$ main_data_references          <chr> "", "", "MW4, 1978; and Vegetables, Herb…
$ footnote                      <chr> "", "", "", "", "", "", "", "", "", "", …
$ water_g                       <chr> "WATER", "Water", "76.7", "9.7", "84.2",…
$ total_nitrogen_g              <chr> "TOTNIT", "Total nitrogen", "0.46", "0.2…
$ protein_g                     <chr> "PROT", "Protein", "2.9", "1.3", "0.2", …
$ fat_g                         <chr> "FAT", "Fat", "15.2", "1.2", "0.1", "0.7…
$ carbohydrate_g                <chr> "CHO", "Carbohydrate", "0.8", "Tr", "Tr"…
$ energy_kcal_kcal              <chr> "KCALS", "kcal", "151", "16", "2", "24",…
$ energy_k_j_k_j                <chr> "KJ", "kJ", "625", "67", "7", "100", "N"…
$ starch_g                      <chr> "STAR", "Starch", "Tr", "0.0", "0.0", "T…
$ oligosaccharide_g             <chr> "OLIGO", "Oligosaccharide", "", "", "", …
$ total_sugars_g                <chr> "TOTSUG", "Total sugars", "0.8", "Tr", "…
$ glucose_g                     <chr> "GLUC", "Glucose", "0.1", "0.0", "0.0", …
$ galactose_g                   <chr> "GALACT", "Galactose", "0.0", "0.0", "0.…
$ fructose_g                    <chr> "FRUCT", "Fructose", "Tr", "0.0", "0.0",…
$ sucrose_g                     <chr> "SUCR", "Sucrose", "0.7", "0.0", "0.0", …
$ maltose_g                     <chr> "MALT", "Maltose", "0.0", "0.0", "0.0", …
$ lactose_g                     <chr> "LACT", "Lactose", "0.0", "0.0", "0.0", …
$ alcohol_g                     <chr> "ALCO", "Alcohol", "", "", "", "", "", "…
$ nsp_g                         <chr> "ENGFIB", "Non-starch polysaccharide", "…
$ aoac_fibre_g                  <chr> "AOACFIB", "AOAC fibre", "", "", "", "",…
$ satd_fa_100g_fa_g             <chr> "SATFAC", "Saturated fatty acids per 100…
$ satd_fa_100g_fd_g             <chr> "SATFOD", "Saturated fatty acids per 100…
$ n_6_poly_100g_fa_g            <chr> "TOTn6PFAC", "Total n-6 polyunsaturated …
$ n_6_poly_100g_food_g          <chr> "TOTn6PFOD", "Total n-6 polyunsaturated …
$ n_3_poly_100g_fa_g            <chr> "TOTn3PFAC", "Total n-3 polyunsaturated …
$ n_3_poly_100g_food_g          <chr> "TOTn3PFOD", "Total n-3 polyunsaturated …
$ cis_mono_fa_100g_fa_g         <chr> "MONOFACc", "cis-Monounsaturated fatty a…
$ cis_mono_fa_100g_food_g       <chr> "MONOFODc", "cis-Monounsaturated fatty a…
$ mono_fa_100g_fa_g             <chr> "MONOFAC", "Monounsaturated fatty acids …
$ mono_fa_100g_food_g           <chr> "MONOFOD", "Monounsaturated fatty acids …
$ cis_polyu_fa_100g_fa_g        <chr> "POLYFACc", "cis-Polyunsaturated fatty a…
$ cis_poly_fa_100g_food_g       <chr> "POLYFODc", "cis-Polyunsaturated fatty a…
$ poly_fa_100g_fa_g             <chr> "POLYFAC", "Polyunsaturated fatty acids …
$ poly_fa_100g_food_g           <chr> "POLYFOD", "Polyunsaturated fatty acids …
$ sat_fa_excl_br_100g_fa_g      <chr> "SATFACx6", "Saturated fatty acids exclu…
$ sat_fa_excl_br_100g_food_g    <chr> "SATFODx6", "Saturated fatty acids exclu…
$ branched_chain_fa_100g_fa_g   <chr> "TOTBRFAC", "Total branched chain per 10…
$ branched_chain_fa_100g_food_g <chr> "TOTBRFOD", "Total branched chain per 10…
$ trans_f_as_100g_fa_g          <chr> "FACTRANS", "Total Trans fatty acids per…
$ trans_f_as_100g_food_g        <chr> "FODTRANS", "Total Trans fatty acids per…
$ cholesterol_mg                <chr> "CHOL", "Cholesterol", "0.0", "0.0", "0.…
$ x                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_1                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_2                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_3                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_4                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_5                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_6                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_7                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ x_8                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
InĀ [29]:
# Remove unwanted rows and select columns
food_composition <- food_composition_raw %>%
# Removes rows 1 and 2 which are descriptive, hence unecessary in our case, using dplyr's slice 
    slice(-c(1, 2)) %>% 
# Remove herbs and spices (H), flours and grains (AA) as they are food or drink additives 
# used in nutritionally insignificant quantities for flavoring or coloring, and not standalone foods
    filter(!(group %in% c('H','AA'))) %>%
# Select the columns we are interested in
    select(food_name, aoac_fibre_g, energy_kcal_kcal, total_sugars_g, group)  
InĀ [30]:
# Let's have a look at our subset
head(food_composition)
glimpse(food_composition)
A data.frame: 6 Ɨ 5
food_nameaoac_fibre_genergy_kcal_kcaltotal_sugars_ggroup
<chr><chr><chr><chr><chr>
1Ackee, canned, drained 1510.8DG
2Agar, dried 16 Tr DG
3Agar, dried, soaked and drained 2 Tr DG
4Alfalfa sprouts, raw 24 0.3DG
5Almonds, flaked and ground N 6124.2GA
6Almonds, toasted 10.95795.1GA
Rows: 2,797
Columns: 5
$ food_name        <chr> "Ackee, canned, drained", "Agar, dried", "Agar, dried…
$ aoac_fibre_g     <chr> "", "", "", "", "N", "10.9", "4.6", "12.5", "", "", "…
$ energy_kcal_kcal <chr> "151", "16", "2", "24", "612", "579", "205", "554", "…
$ total_sugars_g   <chr> "0.8", "Tr", "Tr", "0.3", "4.2", "5.1", "1.6", "4.5",…
$ group            <chr> "DG", "DG", "DG", "DG", "GA", "GA", "GA", "GA", "DG",…

We notice there are either empty data values or given as 'N', which stands for not available. Let's have a look at how missing values are given in the data points, for example in the 'aoac_fibre_g' column:

InĀ [31]:
# Write out all the offending strings using the built-in strings given in naniar
common_na_strings
  1. 'missing'
  2. 'NA'
  3. 'N A'
  4. 'N/A'
  5. '#N/A'
  6. 'NA '
  7. ' NA'
  8. 'N /A'
  9. 'N / A'
  10. ' N / A'
  11. 'N / A '
  12. 'na'
  13. 'n a'
  14. 'n/a'
  15. 'na '
  16. ' na'
  17. 'n /a'
  18. 'n / a'
  19. ' a / a'
  20. 'n / a '
  21. 'NULL'
  22. 'null'
  23. ''
  24. '\\?'
  25. '\\*'
  26. '\\.'
InĀ [32]:
common_na_strings_exp <- c(common_na_strings, 'Tr')
common_na_strings_exp
  1. 'missing'
  2. 'NA'
  3. 'N A'
  4. 'N/A'
  5. '#N/A'
  6. 'NA '
  7. ' NA'
  8. 'N /A'
  9. 'N / A'
  10. ' N / A'
  11. 'N / A '
  12. 'na'
  13. 'n a'
  14. 'n/a'
  15. 'na '
  16. ' na'
  17. 'n /a'
  18. 'n / a'
  19. ' a / a'
  20. 'n / a '
  21. 'NULL'
  22. 'null'
  23. ''
  24. '\\?'
  25. '\\*'
  26. '\\.'
  27. 'Tr'
InĀ [33]:
# Check how NA values are given
# Count the na_strings instances
print(paste("Total na offending string instances", sum(grepl(paste(common_na_strings_exp, collapse = "|"), food_composition))))
# Out of which given as 'NA' are
print(paste("Given as 'NA' are", sum(is.na(food_composition), na.rm = FALSE)))
# While given as '' are
print(paste("Given as '' are", sum(food_composition == '', na.rm = TRUE)))
# While given as 'N' are
print(paste("Given as 'N' are", sum(food_composition == 'N', na.rm = TRUE)))
# And null are:
print(paste("Null are", sum(is.null(food_composition), na.rm = TRUE)))
[1] "Total na offending string instances 5"
[1] "Given as 'NA' are 0"
[1] "Given as '' are 827"
[1] "Given as 'N' are 519"
[1] "Null are 0"

Let's make sure all missing values are given as NA, rather than e.g. "N":

InĀ [34]:
# Use the naniar library to replace all common na string values with 'NA'
food_composition <- replace_with_na_all(food_composition, condition = ~.x %in% common_na_strings_exp)
# Check whether it worked 
sum(is.na(food_composition), na.rm = FALSE)
1054

Finally, let's convert the fiber columns to decimal type and the kcal column to integer:

InĀ [35]:
food_composition <- food_composition %>%
  mutate(
    # Convert fibre and sugar values to numeric (decimal)
    aoac_fibre_g = as.numeric(aoac_fibre_g), 
    total_sugars_g = as.numeric(total_sugars_g), 
    # Convert kcal values to integer
    energy_kcal_kcal = as.integer(energy_kcal_kcal)    
  )

glimpse(food_composition)
Rows: 2,797
Columns: 5
$ food_name        <chr> "Ackee, canned, drained", "Agar, dried", "Agar, dried…
$ aoac_fibre_g     <dbl> NA, NA, NA, NA, NA, 10.9, 4.6, 12.5, NA, NA, NA, 0.0,…
$ energy_kcal_kcal <int> 151, 16, 2, 24, 612, 579, 205, 554, 16, 18, 58, 191, …
$ total_sugars_g   <dbl> 0.8, NA, NA, 0.3, 4.2, 5.1, 1.6, 4.5, 0.2, 0.2, NA, 0…
$ group            <chr> "DG", "DG", "DG", "DG", "GA", "GA", "GA", "GA", "DG",…
InĀ [36]:
# Let's check our subset at this point
head(food_composition)
A tibble: 6 Ɨ 5
food_nameaoac_fibre_genergy_kcal_kcaltotal_sugars_ggroup
<chr><dbl><int><dbl><chr>
Ackee, canned, drained NA1510.8DG
Agar, dried NA 16 NADG
Agar, dried, soaked and drained NA 2 NADG
Alfalfa sprouts, raw NA 240.3DG
Almonds, flaked and ground NA6124.2GA
Almonds, toasted 10.95795.1GA
InĀ [Ā ]:
 

To get an understanding of the number of missing values, we'll use the Amelia library:

InĀ [37]:
library(Amelia)
# Make sure the graph is visible enough
options(repr.plot.width = 14, repr.plot.height = 12)

missmap(food_composition, main="Food Composition - Missing Values",col=c("#ffb55a","#7eb0d5"),legend=TRUE)
No description has been provided for this image

There are many fibre values missing, however those in place allows us to still work on the dataset, so let's remove them

InĀ [38]:
food_composition <- food_composition %>% drop_na()
InĀ [39]:
# Check na values in the dataset now
missmap(food_composition, main="Food Composition - Missing Values",col=c("#ffb55a","#7eb0d5"),legend=TRUE)
No description has been provided for this image
InĀ [40]:
# Let's see if our dataset makes sense
head(food_composition[order(-food_composition$aoac_fibre_g),])
A tibble: 6 Ɨ 5
food_nameaoac_fibre_genergy_kcal_kcaltotal_sugars_ggroup
<chr><dbl><int><dbl><chr>
Green beans, dried 35.024622.6DI
Breakfast cereal, bran type cereal, fortified24.626720.0AI
Crispbread, rye 20.0284 3.4AM
Onions, dried, raw 19.330954.4DG
Lentils, red, split, dried, raw 17.4311 1.3DB
Breakfast cereal, bran flakes, fortified 13.433321.0AI
InĀ [41]:
# Let's create a named list to match food abbreviations to their printed names
lookup_list = c(
'AA' = 'Flours, grains and starches',
'AB' = 'Sandwiches',
'AC' = 'Rice',
'AD' = 'Pasta',
'AE' = 'Pizzas',
'AF' = 'Breads',
'AG' = 'Rolls',
'AI' = 'Breakfast cereals',
'AK' = 'Infant cereal foods',
'AM' = 'Biscuits',
'AN' = 'Cakes',
'AO' = 'Pastry',
'AP' = 'Buns and pastries',
'AS' = 'Puddings',
'AT' = 'Savouries',
'BA' = 'Cows milk',
'BAB' = 'Breakfast milk',
'BAE' = 'Skimmed milk',
'BAH' = 'Semi-skimmed milk',
'BAK' = 'Whole milk',
'BAN' = 'Channel Island milk',
'BAR' = 'Processed milks',
'BC' = 'Other milks',
'BF' = 'Infant formulas',
'BFD' = 'Whey-based modified milks',
'BFG' = 'Non-whey-based modified milks',
'BFJ' = 'Soya-based modified milks',
'BFP' = 'Follow-on formulas',
'BH' = 'Milk-based drinks',
'BJ' = 'Creams',
'BJC' = 'Fresh creams (pasteurised)',
'BJF' = 'Frozen creams (pasteurised)',
'BJL' = 'Sterilised creams',
'BJP' = 'UHT creams',
'BJS' = 'Imitation creams',
'BL' = 'Cheeses',
'BN' = 'Yogurts',
'BNE' = 'Whole milk yogurts',
'BNH' = 'Low fat yogurts',
'BNS' = 'Other yogurts',
'BP' = 'Ice creams',
'BR' = 'Puddings and chilled desserts',
'BV' = 'Savoury dishes and sauces',
'CA' = 'Eggs',
'CD' = 'Egg dishes',
'CDE' = 'Savoury egg dishes',
'CDH' = 'Sweet egg dishes',
'DA' = 'Potatoes',
'DAE' = 'Early potatoes',
'DAM' = 'Main crop potatoes',
'DAP' = 'Chipped old potatoes',
'DAR' = 'Potato products',
'DB' = 'Beans and lentils',
'DF' = 'Peas',
'DG' = 'Vegetables, general',
'DI' = 'Vegetables, dried',
'DR' = 'Vegetable dishes',
'FA' = 'Fruit, general',
'FC' = 'Fruit juices',
'GA' = 'Nuts and seeds, general',
'JA' = 'White fish',
'JC' = 'Fatty fish',
'JK' = 'Crustacea',
'JM' = 'Molluscs',
'JR' = 'Fish products and dishes',
'MA' = 'Meat',
'MAA' = 'Bacon',
'MAC' = 'Beef',
'MAE' = 'Lamb',
'MAG' = 'Pork',
'MAI' = 'Veal',
'MC' = 'Poultry',
'MCA' = 'Chicken',
'MCC' = 'Duck',
'MCE' = 'Goose',
'MCG' = 'Grouse',
'MCI' = 'Partridge',
'MCK' = 'Pheasant',
'MCM' = 'Pigeon',
'MCO' = 'Turkey',
'ME' = 'Game',
'MEA' = 'Hare',
'MEC' = 'Rabbit',
'MEE' = 'Venison',
'MG' = 'Offal',
'MBG' = 'Burgers and grillsteaks',
'MI' = 'Meat products',
'MIG' = 'Other meat products',
'MR' = 'Meat dishes',
'OA' = 'Spreading fats',
'OB' = 'Animal fats',
'OC' = 'Oils',
'OE' = 'Non-animal fats',
'OF' = 'Cooking fats',
'PA' = 'Powdered drinks, essences and infusions',
'PAA' = 'Powdered drinks and essences',
'PAC' = 'Infusions',
'PC' = 'Soft drinks',
'PCA' = 'Carbonated drinks',
'PCC' = 'Squash and cordials',
'PE' = 'Juices',
'QA' = 'Beers',
'QC' = 'Ciders',
'QE' = 'Wines',
'QF' = 'Fortified wines',
'QG' = 'Vermouths',
'QI' = 'Liqueurs',
'QK' = 'Spirits',
'SC' = 'Sugars, syrups and preserves',
'SE' = 'Confectionery',
'SEA' = 'Chocolate confectionery',
'SEC' = 'Non-chocolate confectionery',
'SN' = 'Savoury snacks',
'SNA' = 'Potato-based snacks',
'SNB' = 'Potato and mixed cereal snacks',
'SNC' = 'Non-potato snacks',
'WA' = 'Soups',
'WAA' = 'Homemade soups',
'WAC' = 'Canned soups',
'WAE' = 'Packet soups',
'WC' = 'Sauces',
'WCD' = 'Dairy sauces',
'WCG' = 'Salad sauces, dressings and pickles',
'WCN' = 'Non-salad sauces',
'WE' = 'Pickles and chutneys',
'WY' = 'Miscellaneous foods')
InĀ [42]:
lookup_list['WA']
typeof(lookup_list)
WA: 'Soups'
'character'
InĀ [43]:
head(lookup_list)
AA
'Flours, grains and starches'
AB
'Sandwiches'
AC
'Rice'
AD
'Pasta'
AE
'Pizzas'
AF
'Breads'
InĀ [44]:
food_composition <- food_composition[food_composition$group %in% names(lookup_list), ]
food_composition$group_full_name <- sapply(food_composition$group, function(abbrev) lookup_list[[abbrev]])
InĀ [45]:
head(food_composition)
A tibble: 6 Ɨ 6
food_nameaoac_fibre_genergy_kcal_kcaltotal_sugars_ggroupgroup_full_name
<chr><dbl><int><dbl><chr><chr>
Almonds, toasted 10.9579 5.1GANuts and seeds, general
Almonds, weighed with shells 4.6205 1.6GANuts and seeds, general
Almonds, whole kernels 12.5554 4.5GANuts and seeds, general
Anchovies, canned in oil, drained 0.0191 0.0JCFatty fish
Apple sauce, homemade 1.4 7920.2WCSauces
Apples, cooking, baked with sugar, flesh only 1.9 6917.1FAFruit, general

We have too many values to plot on a graph, so a grouping could make sense. We'll use the groups already provided with the dataset.

InĀ [46]:
food_composition_by_g <- food_composition %>%
  group_by(group_full_name) %>%
  summarise(
    fibre_group_average = as.numeric(format(round(mean(aoac_fibre_g), 3)), nsmall = 3),
    sugar_group_average = as.numeric(format(round(mean(total_sugars_g), 3)), nsmall = 3))
  
head(food_composition_by_g)
A tibble: 6 Ɨ 3
group_full_namefibre_group_averagesugar_group_average
<chr><dbl><dbl>
Bacon 0.005 0.118
Beans and lentils6.736 1.659
Beef 0.000 0.000
Biscuits 4.01724.795
Breads 3.868 3.844
Breakfast cereals6.28417.861
InĀ [47]:
food_composition_by_g <- food_composition_by_g[order(-food_composition_by_g$fibre_group_average),]
head(food_composition_by_g, 30)
A tibble: 30 Ɨ 3
group_full_namefibre_group_averagesugar_group_average
<chr><dbl><dbl>
Vegetables, dried 22.00031.000
Non-potato snacks 8.400 1.900
Savoury snacks 7.114 3.829
Beans and lentils 6.736 1.659
Breakfast cereals 6.28417.861
Nuts and seeds, general 5.76010.413
Peas 4.370 3.290
Rolls 4.040 3.280
Biscuits 4.01724.795
Breads 3.868 3.844
Potato products 3.800 1.233
Potato-based snacks 3.650 1.200
Chipped old potatoes 3.367 0.922
Pastry 3.247 1.073
Packet soups 3.050 9.550
Pasta 3.018 2.218
Vegetables, general 2.720 4.901
Potatoes 2.700 0.800
Pizzas 2.560 2.560
Buns and pastries 2.54613.219
Infant cereal foods 2.40023.400
Vegetable dishes 2.351 3.178
Main crop potatoes 2.283 1.250
Chocolate confectionery 2.23055.170
Fruit, general 2.19014.770
Early potatoes 2.100 1.125
Rice 2.053 0.874
Puddings 2.00321.903
Powdered drinks and essences 1.95726.907
Cakes 1.85234.141
InĀ [48]:
# Create a box plot for aoac_fibre_g
box_plot <- ggplot(data = food_composition_by_g, aes(y = fibre_group_average, text = paste("Name:", group_full_name, 
                               "<br>Fiber:", fibre_group_average))) +
  geom_boxplot(fill = "lightblue", color = "black", outlier.colour = "red") +  # Box plot appearance
  labs(title = "Box Plot of AOAC Fiber (g)",
       y = "AOAC Fiber (g)") +
  theme_minimal()  # Optional: use a minimal theme

# Convert ggplot to plotly for interactivity
fig <- ggplotly(box_plot, height = 600, width = 800, tooltip = "text")

fig
InĀ [49]:
# use options!
scatter_plot <- ggplot(food_composition_by_g, aes(x=fibre_group_average, y=sugar_group_average, text = paste("Name:", group_full_name, 
                               "<br>Fiber:", fibre_group_average, "<br>Sugar:", sugar_group_average))) + 
    geom_point(
        color="black",
        fill="#69b3a2",
        shape=22,
        alpha=0.5,
        size=2,
        stroke = .2
        ) +
    theme_ipsum()

# Convert ggplot to plotly for interactivity
fig_correl <- ggplotly(scatter_plot, height = 600, width = 800, tooltip = "text")

fig_correl
InĀ [50]:
write.csv(food_composition, file = "/kaggle/working/food_composition_cleaned.csv", row.names = FALSE)
write.csv(food_composition_by_g, file = "/kaggle/working/food_composition_grouped.csv", row.names = FALSE)